*Prepares entropy data
*Version 15 Stata

set more off

*Establish working directories
local d3 Geographic Data
local d5 Antidepressant Data
local d22 Entropy Data

*Bring in data (basically raw antidepressant data)
cd `d5'
use provider_product_cat_allyears.dta, clear

*******************************************************************************
*SAMPLE SELECTION
*Kick out low prescribers: need to have prescribed at least 12 scripts in one year
*******************************************************************************
*Generate total prescriptions per year indicator
sort provider_id year
by provider_id year: egen tot_scripts_year = sum(tot_scripts)
by provider_id: egen max_tot_scripts_allyears = max(tot_scripts_year)
keep if max_tot_scripts_allyears >=12
tempfile temp2
save `temp2', replace

*******************************************************************************
*SAMPLE SELECTION
*Keep only patients 10+
*******************************************************************************
drop if patient_age=="00-02" | patient_age=="03-09"

*Collapse data into provider-year-product
collapse (sum) tot_scripts new_scripts, by(provider_id product year)

*******************************************************************************
*SAMPLE SELECTION
*Keep only physicians who appear in the AMA file + valid geographic information
*Note: The variable county_recode provides the "raw" county id for each provider.
*		We have not imposed the county collapse prodedure.
*******************************************************************************
cd `d3'
merge m:1 provider_id using verified_county_all.dta
keep if _merge==3
drop _merge
rename county_recode county_raw

*******************************************************************************
*SAMPLE SELECTION
*Define specialty, then exclude "exclude" specialties
*******************************************************************************
generate specialty=""
replace specialty = "Exclude" if provider_speciality_descrip=="DENTIST" | provider_speciality_descrip=="VETERINARIAN" | provider_speciality_descrip=="NOT APPLICABLE" | provider_speciality_descrip=="UNSPECIFIED"
drop if specialty=="Exclude"

*Redefine other specialties
replace specialty="Primary Care" if provider_speciality_descrip=="FAMILY MEDICINE" | provider_speciality_descrip=="INTERNAL MEDICINE" | provider_speciality_descrip=="GENERAL PRACTICE" | provider_speciality_descrip=="PEDIATRICS" | provider_speciality_descrip=="INTERNAL MEDICINE/PEDIATRICS" | provider_speciality_descrip=="GERIATRIC MEDICINE (INTERNAL MEDICIN" | provider_speciality_descrip=="HOSPITALIST" | provider_speciality_descrip=="GERIATRIC MEDICINE (FAMILY MEDICINE)" | provider_speciality_descrip=="GENERAL PREVENTIVE MEDICINE" | provider_speciality_descrip=="SLEEP MEDICINE" | provider_speciality_descrip=="INTERNAL MEDICINE/FAMILY MEDICINE" | provider_speciality_descrip=="DEVELOPMENTAL/BEHAVIORAL PEDIATRICS" | provider_speciality_descrip=="ADOLESCENT MEDICINE (PEDIATRICS)" | provider_speciality_descrip=="INTERNAL MEDICINE/PREVENTIVE MEDICIN" | provider_speciality_descrip=="ADOLESCENT MEDICINE (INTERNAL MEDICI" | provider_speciality_descrip=="INTERNAL MEDICINE/EMERGENCY MEDICINE"

replace specialty="Psychiatry" if provider_speciality_descrip=="PSYCHIATRY" | provider_speciality_descrip=="CHILD & ADOLESCENT PSYCHIATRY" | provider_speciality_descrip=="GERIATRIC PSYCHIATRY" | provider_speciality_descrip=="ADDICTION PSYCHIATRY" | provider_speciality_descrip=="FORENSIC PSYCHIATRY" | provider_speciality_descrip=="PSYCHIATRY/NEUROLOGY" | provider_speciality_descrip=="ADDICTION MEDICINE" | provider_speciality_descrip=="PED/PSYCH/CHILD & ADOLESCENT PSYCH" | provider_speciality_descrip=="INTERNAL MEDICINE/PSYCHIATRY" | provider_speciality_descrip=="PSYCHOANALYSIS" | provider_speciality_descrip=="PSYCHIATRY/FAMILY MEDICINE" | provider_speciality_descrip=="PAIN MEDICINE (PSYCHIATRY)" | provider_speciality_descrip=="BEHAVIORAL HEALTH & SOCIAL SERVICES"

replace specialty = "Non Physician" if provider_speciality_descrip=="NURSE PRACTITIONER" | provider_speciality_descrip=="PHYSICIAN ASSISTANT" | provider_speciality_descrip=="CLINICAL NURSE SPECIALIST" | provider_speciality_descrip=="PSYCHOLOGY" | provider_speciality_descrip=="REGISTERED NURSE" | provider_speciality_descrip=="ADVANCED REGISTERED NURSE" | provider_speciality_descrip=="LICENSED PRACTICAL NURSE" | provider_speciality_descrip=="LICENSED VOCATIONAL NURSE"

replace specialty = "Other Medical" if provider_speciality_descrip=="EMERGENCY MEDICINE" | provider_speciality_descrip=="CARDIOVASCULAR DISEASE" | provider_speciality_descrip=="PHYSICAL MEDICINE & REHABILITATION" | provider_speciality_descrip=="OBSTETRICS & GYNECOLOGY" | provider_speciality_descrip=="GENERAL SURGERY" | provider_speciality_descrip=="NEPHROLOGY" | provider_speciality_descrip=="PULMONARY DISEASE" | provider_speciality_descrip=="INFECTIOUS DISEASE" | provider_speciality_descrip=="ANESTHESIOLOGY" | provider_speciality_descrip=="GASTROENTEROLOGY" | provider_speciality_descrip=="HEMATOLOGY/ONCOLOGY" | provider_speciality_descrip=="RHEUMATOLOGY" | provider_speciality_descrip=="MEDICAL ONCOLOGY" | provider_speciality_descrip=="ENDOCRINOLOGY, DIABETES & METABOLISM" | provider_speciality_descrip=="ORTHOPEDIC SURGERY" | provider_speciality_descrip=="DERMATOLOGY" | provider_speciality_descrip=="PULMONARY CRITICAL CARE MEDICINE" | provider_speciality_descrip=="UROLOGY" | provider_speciality_descrip=="OTOLARYNGOLOGY" | provider_speciality_descrip=="HEMATOLOGY (INTERNAL MEDICINE)" | provider_speciality_descrip=="PLASTIC SURGERY" | provider_speciality_descrip=="GYNECOLOGY" | provider_speciality_descrip=="CRITICAL CARE MEDICINE (IM)" | provider_speciality_descrip=="RADIATION ONCOLOGY" | provider_speciality_descrip=="OBSTETRICS"

replace specialty="Other" if specialty==""

*******************************************************************************
*SAMPLE SELECTION
*Generate flag if county info missing, then exclude missing county physicians
*******************************************************************************
generate county_missing = cond(county_raw==., 1, 0)
drop if county_missing==1
drop county_missing

*Recode product definitions

*DEFINITION TWO - Product, ignoring salts / dosage mechanism
generate product2 = product
replace product2 = "Budeprion" if product=="BUDEPRION SR" | product=="BUDEPRION XL"
replace product2 = "Bupropion" if product=="BUPROPION HCL" | product=="BUPROPION HCL SR" | product=="BUPROPION HCL SR W" | product=="BUPROPION HCL XL"
replace product2 = "Desvenlafaxine" if product=="DESVENLAFAXINE ER" | product=="DESVENLAFAXINE FUM ER"
replace product2 = "Effexor" if product=="EFFEXOR" | product=="EFFEXOR XR"
replace product2 = "Fluvoxamine" if product=="FLUVOXAMINE MAL" | product=="FLUVOXAMINE MAL ER"
replace product2 = "Imipramine" if product=="IMIPRAMINE HCL" | product=="IMIPRAMINE PAM"
replace product2 = "Limbitrol" if product=="LIMBITROL" | product=="LIMBITROL DS"
replace product2 = "Luvox" if product=="LUVOX" | product=="LUVOX CR"
replace product2 = "Paroxetine" if product=="PAROXETINE CR" | product=="PAROXETINE HCL" | product=="PAROXETINE HCL ER"
replace product2 = "Paxil" if product=="PAXIL" | product=="PAXIL CR"
replace product2 = "Prozac" if product=="PROZAC" | product=="PROZAC WEEKLY"
replace product2 = "Remeron" if product=="REMERON" | product=="REMERON SOLTAB"
replace product2 = "Tofranil" if product=="TOFRANIL" | product=="TOFRANIL-PM"
replace product2 = "Venlafaxine" if product=="VENLAFAXINE HCL" | product=="VENLAFAXINE HCL ER"
replace product2 = "Wellbutrin" if product=="WELLBUTRIN" | product=="WELLBUTRIN SR" | product=="WELLBUTRIN XL"
replace product2= "Chlorpromazine" if product=="CHLORPROMAZINE HCL"
replace product2= "Olanz/Fluox" if product=="OLANZAP/FLUOX HCL"
replace product2= "Quetiapine" if product=="QUETIAPINE FUM"
replace product2= "Thioridazine" if product=="THIORIDAZINE HCL"
replace product2= "Trifluoperazin" if product=="TRIFLUOPERAZIN HCL"
replace product2= "Ziprasidone" if product=="ZIPRASIDONE HCL"

*DEFINITION THREE - Product molecule, as defined by active ingredient
*NOTE: As of January 2018, fix the product3 definition for three products: Bupropion, CLOMIPRAMINE HCL, and ADAPIN.
generate product3 = product2
replace product3 = "DOXEPIN HCL" if product2=="ADAPIN"
replace product3 = "Bupropion" if product2=="Budeprion"
replace product3 = "CLOMIPRAMINE HCL" if product3=="CLOMIPRAMINE"
replace product3 = "CLOMIPRAMINE HCL" if product2=="ANAFRANIL"
replace product3 = "Bupropion" if product2=="APLENZIN" | product2=="FORFIVO XL" | product2=="Wellbutrin"
replace product3 = "AMOXAPINE" if product2=="ASENDIN"
replace product3 = "CITALOPRAM HBR" if product2=="CELEXA"
replace product3 = "DULOXETINE HCL" if product2=="CYMBALTA"
replace product3 = "TRAZODONE HCL" if product2=="DESYREL" | product2=="OLEPTRO"
replace product3 = "Venlafaxine" if product2=="Effexor"
replace product3 = "AMITRIPTYLINE HCL" if product2=="ELAVIL" | product2=="Limbitrol"
replace product3 = "PERPHENAZN/AMITRIP" if product2=="ETRAFON" | product2=="TRIAVIL"
replace product3 = "Desvenlafaxine" if product2=="KHEDEZLA ER" | product2=="PRISTIQ"
replace product3 = "ESCITALOPRAM OXAL" if product2=="LEXAPRO"
replace product3 = "MAPROTILINE HCL" if product2=="LUDIOMIL"
replace product3 = "Fluvoxamine" if product2=="Luvox"
replace product3 = "PHENELZINE SULF" if product2=="NARDIL"
replace product3 = "DESIPRAMINE HCL" if product2=="NORPRAMIN"
replace product3 = "NORTRIPTYLINE HCL" if product2=="PAMELOR"
replace product3 = "TRANYLCYPRO SULF" if product2=="PARNATE"
replace product3 = "Paroxetine" if product2=="Paxil" | product2=="PEXEVA"
replace product3 = "FLUOXETINE HCL" if product2=="Prozac" | product2=="SENTROXATINE" 
replace product3 = "MIRTAZAPINE" if product2=="Remeron"
replace product3 = "NEFAZODONE HCL" if product2=="SERZONE"
replace product3 = "DOXEPIN HCL" if product2=="SINEQUAN"
replace product3 = "TRIMIPRAMINE MAL" if product2=="SURMONTIL"
replace product3 = "Imipramine" if product2=="Tofranil"
replace product3 = "TRAZODONE HCL" if product2=="TRAZAMINE"
replace product3 = "PROTRIPTYLINE HCL" if product2=="VIVACTIL"
replace product3 = "SERTRALINE HCL" if product2=="ZOLOFT"

*DEFINITION FOUR - Product class
*NOTE: SSRI, SNRI, NDRI, SARI, with tricyclics and tetracyclics together
*	I coded Brintellix as SSRI.
*	I coded Nefazodone as SARI given same action as Trazodone.
generate product4 = ""
replace product4 = "SSRI" if product3=="CITALOPRAM HBR" | product3=="ESCITALOPRAM OXAL" | product3=="FLUOXETINE HCL" | product3=="Fluvoxamine" | product3=="Paroxetine" | product3=="SERTRALINE HCL" | product3=="VIIBRYD" | product3=="BRINTELLIX"
replace product4 = "SNRI" if product3=="DULOXETINE HCL" | product3=="Desvenlafaxine" | product3=="FETZIMA" | product3=="Venlafaxine"
replace product4 = "NDRI" if product3=="Bupropion"
replace product4 = "SARI" if product3=="TRAZODONE HCL" | product3=="NEFAZODONE HCL"
replace product4 = "MAOI" if product3=="EMSAM" | product3=="MARPLAN" | product3=="PHENELZINE SULF" | product3=="TRANYLCYPRO SULF"
replace product4 = "TCA" if product4==""

*DEFINITION FIVE - "Big" sellers and others
*NOTE: There are 11 big sellers by molecule; other molecules are grouped into 
*	an "other" category.
generate product5 = product3
replace product5 = "Other" if product3!="SERTRALINE HCL" & product3!="CITALOPRAM HBR" & product3!="FLUOXETINE HCL" & product3!="ESCITALOPRAM OXAL" & product3!="Paroxetine" & product3!="Venlafaxine" & product3!="DULOXETINE HCL" & product3!="Bupropion" & product3!="TRAZODONE HCL" & product3!="AMITRIPTYLINE HCL" & product3!="MIRTAZAPINE"

*Label variables
label variable product "Product: Raw IMS product"
label variable product2 "Product: Without salt / mechanism"
label variable product3 "Product: Active ingredient"
label variable product4 "Product: Class"
label variable product5 "Product: Top sellers"

*Save final file
cd `d22'
save entropy_data_final_we.dta, replace

*Calculate entropy for regressions with BCBS data
*******************************************************************************

*Bring in data
cd `d22'
use entropy_data_final_we.dta, clear

*Calculate entropy by specialty and cohort
*NOTE: We keep only physicians, with valid grad years, and rank available.
*Select drug product definition
local drug product3

*Pull provider-level information for merging later
*NOTE: We do this to ease computational burden.
preserve
by provider_id, sort: keep if _n==1
keep provider_id provider_yob-specialty
tempfile provider
save `provider', replace
restore

*Collapse data into product, provider, and year
*NOTE: These data are in patient-product-provider-year level, but we don't
*	need that level of granularity for summary statistics
collapse (sum) tot_scripts, by(provider_id year `drug')

*Hard code global mental health drug products across years
local drug_products=33

*Generate total yearly prescriptions by county
by provider_id year, sort: egen total = sum(tot_scripts)

*Generate entropy measures
generate share = tot_scripts/total
generate temp2 = -(share*ln(share))/ln(`drug_products')
by provider_id year, sort: egen entropy_all_global=sum(temp2)

*Keep one record per provider/year
by provider_id year, sort: keep if _n==1

*parse variables
keep provider_id year entropy_molecule_avg

*save file; to be exported and used later in tables_III_IV.do
save Princeton_entropy_master_file.dta, replace

